This notebook evaluates the behavioural predictions (RT, accuracy) that follow from the predicted rates of forgetting. We simulate the behavioural predictions that the adaptive fact learning system would have made, if it had used the predicted rate of forgetting as the starting estimate in the learning sequence.
library(fst)
fst package v0.9.8
library(data.table)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
data.table 1.14.8 using 1 threads (see ?getDTthreads). Latest news: r-datatable.com
**********
This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.
This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux.
**********
library(tidyr)
library(purrr)
Attaching package: ‘purrr’
The following object is masked from ‘package:data.table’:
transpose
library(furrr)
Loading required package: future
library(stringr)
library(ggplot2)
library(patchwork)
library(wesanderson)
library(lme4)
Loading required package: Matrix
Attaching package: ‘Matrix’
The following objects are masked from ‘package:tidyr’:
expand, pack, unpack
library(lmerTest)
Attaching package: ‘lmerTest’
The following object is masked from ‘package:lme4’:
lmer
The following object is masked from ‘package:stats’:
step
library(multcomp)
Loading required package: mvtnorm
Loading required package: survival
Attaching package: ‘survival’
The following object is masked from ‘package:future’:
cluster
Loading required package: TH.data
Loading required package: MASS
Attaching package: ‘MASS’
The following object is masked from ‘package:patchwork’:
area
Attaching package: ‘TH.data’
The following object is masked from ‘package:MASS’:
geyser
library(emmeans)
source(file.path("..", "scripts", "99_slimstampen_model_funs.R"))
Attaching package: ‘dplyr’
The following object is masked from ‘package:MASS’:
select
The following objects are masked from ‘package:data.table’:
between, first, last
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
future::plan("multisession", workers = 9) # Set to desired number of cores
theme_set(theme_light(base_size = 14) +
theme(strip.text = element_text(colour = "black")))
condition_colours <- wes_palette("Darjeeling1", n = 5)
condition_colours[c(2, 4, 5)] <- condition_colours[c(4, 5, 2)]
dataset_colours <- wes_palette("Darjeeling2", n = 5)[c(2, 3)]
load_data_with_predictions <- function (course) {
# Data
d_full <- read_fst(file.path("..", "data", paste0("formatted_", str_replace_all(course, " ", "_"), ".fst")))
setDT(d_full)
d <- d_full[!is.na(fact_id), .(user_id, fact_id, start_time, rt, correct)]
rm(d_full)
gc()
setorder(d, user_id, fact_id, start_time)
# ROF predictions
pred_user <- read_fst(file.path("..", "data", "predictions", paste0("pred_v_obs_user_", str_replace_all(course, " ", "_"), ".fst")))
pred_fact <- read_fst(file.path("..", "data", "predictions", paste0("pred_v_obs_fact_", str_replace_all(course, " ", "_"), ".fst")))
pred_fact_user <- read_fst(file.path("..", "data", "predictions", paste0("pred_fact_and_user_", str_replace_all(course, " ", "_"), ".fst")))
setDT(pred_user)
setDT(pred_fact)
setDT(pred_fact_user)
# Remove NA and duplicates
pred_user <- unique(pred_user[!is.na(alpha)])
pred_fact <- unique(pred_fact[!is.na(alpha)])
pred_fact_user <- unique(pred_fact_user[!is.na(alpha)])
# Make Domain prediction
pred_domain <- mean(unique(pred_fact, by = c("fact_id"))$pred_fact)
pred_default <- 0.3
# Combine
setnames(pred_user, "n_train_obs", "n_train_obs_user")
setnames(pred_fact, "n_train_obs", "n_train_obs_fact")
pred_all <- merge(pred_user, pred_fact, by = c("user_id", "fact_id", "alpha", "n_reps"), all = TRUE)
pred_all <- merge(pred_all, pred_fact_user, by = c("user_id", "fact_id", "alpha"), all = TRUE)
pred_all[, pred_default := pred_default]
pred_all[, pred_domain := pred_domain]
d_prep <- d[, .(user_id,
fact_id,
text = "",
start_time,
rt,
correct,
threshold = -0.8)]
d_pred <- merge(pred_all, d_prep, by = c("user_id", "fact_id"))
return(d_pred)
}
predict_behaviour <- function (d) {
# Process the data in manageable chunks
chunk_size <- 1e4
obs <- unique(d[, .(user_id, fact_id)])
obs_chunks <- c(seq(1, nrow(obs), by = chunk_size), nrow(obs)+1)
pred_beh <- map_dfr(1:(length(obs_chunks)-1), function (i) {
msg <- paste("Chunk", i, "/", length(obs_chunks)-1)
system(paste("echo", msg))
obs_i <- obs[obs_chunks[i]:obs_chunks[i+1]-1]
d_i <- d[obs_i, on = .(user_id, fact_id)]
d_i_list <- split(d_i, by = c("user_id", "fact_id"), drop = TRUE)
pred_beh_i <- future_map_dfr(d_i_list, function (learn_seq) {
# Organise predictions for this sequence
learn_seq_preds <- pivot_longer(
learn_seq[1,],
pred_user:pred_domain,
names_to = "prediction_type",
names_prefix = "pred_",
values_to = "predicted_alpha"
)
setDT(learn_seq_preds)
# Look only at trial 3 in each sequence
trial <- learn_seq[3,]
delay <- learn_seq[2:3, diff(start_time)]
# Calculate behavioural predictions for each prediction method
map_dfr(seq(nrow(learn_seq_preds)), function (j) {
prediction_type <- learn_seq_preds[j, prediction_type]
predicted_alpha <- learn_seq_preds[j, predicted_alpha]
predicted_activation <- NA
predicted_accuracy <- NA
predicted_rt <- NA
if (!is.na(predicted_alpha)) {
predicted_activation <- calculate_activation(
time = trial[, start_time],
id = trial[, fact_id],
factalpha = predicted_alpha,
responses = learn_seq[1:2,]
)
predicted_accuracy <- p_recall(
activation = predicted_activation,
threshold = -0.8,
# activation_noise = 0.5
activation_noise = 0.2
)
predicted_rt <- estimate_reaction_time_from_activation(
activation = predicted_activation,
reading_time = 300
)
}
return(
list(
user_id = trial[, user_id],
fact_id = trial[, fact_id],
delay = delay,
correct = trial[, correct],
rt = trial[, rt],
prediction_type = prediction_type,
predicted_alpha = predicted_alpha,
predicted_activation = predicted_activation,
predicted_accuracy = predicted_accuracy,
predicted_rt = predicted_rt
)
)
})
})
})
setDT(pred_beh)
# Remove rows without prediction
pred_beh <- pred_beh[!is.na(predicted_alpha)]
pred_beh <- pred_beh[!is.infinite(predicted_rt)]
# Set proper condition labels
condition_labels <- data.table(
prediction_type = c("default", "domain", "fact", "user", "fact_user"),
prediction_label = factor(
c("Default", "Domain", "Fact", "Learner", "Fact & Learner"),
levels = c("Default", "Domain", "Fact", "Learner", "Fact & Learner")
)
)
pred_beh <- pred_beh[condition_labels, on = .(prediction_type)]
return(pred_beh)
}
Calculate behavioural predictions for trial 3:
pred_gl_beh_path <- file.path("..", "data", "predictions", "pred_behaviour_gl.fst")
pred_ss_beh_path <- file.path("..", "data", "predictions", "pred_behaviour_ss.fst")
if (!file.exists(pred_gl_beh_path)) {
pred_gl <- load_data_with_predictions("Grandes Lignes")
pred_gl_beh <- predict_behaviour(pred_gl)
write_fst(pred_gl_beh, pred_gl_beh_path)
} else {
pred_gl_beh <- read_fst(pred_gl_beh_path)
setDT(pred_gl_beh)
}
fstcore package v0.9.14
(OpenMP was not detected, using single threaded mode)
if (!file.exists(pred_ss_beh_path)) {
pred_ss <- load_data_with_predictions("Stepping Stones")
pred_ss_beh <- predict_behaviour(pred_ss)
write_fst(pred_ss_beh, pred_ss_beh_path)
} else {
pred_ss_beh <- read_fst(pred_ss_beh_path)
setDT(pred_ss_beh)
}
pred_beh <- rbind(pred_gl_beh[, course := "French"],
pred_ss_beh[, course := "English"])
rm(pred_gl_beh, pred_ss_beh)
gc()
used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 2682960 143.3 4903551 261.9 NA 4128751 220.5
Vcells 242583431 1850.8 655809036 5003.5 32768 565796171 4316.7
p_act_dist <- ggplot(pred_beh[between(predicted_activation, -2, 0)], aes(x = predicted_activation, fill = prediction_label)) +
facet_grid(course ~ prediction_label, scales = "free_y") +
geom_histogram(aes(y = ..density..), binwidth = .01) +
guides(fill = "none") +
labs(x = "RT (in s)",
y = "Density") +
scale_fill_manual(values = condition_colours)
# p_act_dist
ggsave(plot = p_act_dist, file.path("..", "output", "activation_distribution.png"),
device = "png", width = 7.5, height = 4.5)
rm(p_act_dist)
Distribution of observed correct RT (truncated at 25 seconds for readability):
p_rt_dist <- ggplot(pred_beh[correct == 1 & between(rt, 0, 25000)], aes(x = rt/1000)) +
facet_grid(course ~ ., scales = "free_y") +
geom_histogram(aes(y = ..density..), binwidth = .1) +
guides(fill = "none") +
labs(x = "RT (in s)",
y = "Density")
# p_rt_dist
ggsave(plot = p_rt_dist, file.path("..", "output", "rt_observed_distribution.png"),
device = "png", width = 6, height = 6)
rm(p_rt_dist)
p_rt_pred_dist <- ggplot(pred_beh, aes(x = predicted_rt/1000)) +
facet_grid(course ~ ., scales = "free_y") +
geom_histogram(aes(y = ..density..), binwidth = .1) +
guides(fill = "none") +
labs(x = "Predicted RT (in s)",
y = "Density")
# p_rt_pred_dist
ggsave(plot = p_rt_pred_dist, file.path("..", "output", "rt_predicted_distribution.png"),
device = "png", width = 6, height = 6)
rm(p_rt_pred_dist)
Calculate absolute prediction error:
pred_rt_error <- (pred_beh
[correct == 1]
[between(rt, 0, 25000)]
[, rt_pred_error := predicted_rt - rt]
[, abs_rt_pred_error := abs(rt_pred_error)]
)
pred_rt_error_avg <- pred_rt_error[, .(mae = mean(abs_rt_pred_error),
ae_se = sd(abs_rt_pred_error)/.N),
by = .(course, prediction_label)]
n_obs <- pred_rt_error[, .N, by = .(course, prediction_label)]
# plot_range <- range(pred_beh$predicted_rt/1000, na.rm = TRUE)
plot_range <- c(0, 10)
plot_breaks <- seq(0, 10, by = 2)
#
# plot_range <- quantile(pred_beh$predicted_rt/1000, c(.005, .995))
p_rt_pred_v_obs <- ggplot(pred_beh[correct == 1], aes(x = predicted_rt/1000, y = rt/1000, colour = prediction_label)) +
facet_grid(course ~ prediction_label) +
geom_abline(slope = 1, intercept = 0, lty = 3, alpha = 0.75) +
geom_point(alpha = .1, size = .1, pch = ".") +
geom_smooth(method = "lm", formula = y ~ x, colour = "black") +
geom_label(data = pred_rt_error_avg,
aes(label = paste("MAE =", formatC(mae/1000, digits = 3, flag = "#"))),
x = plot_range[2], y = plot_range[1],
hjust = 1, colour = "NA", size = 3,
alpha = .9,
label.size = NA) +
geom_text(data = pred_rt_error_avg,
aes(label = paste("MAE =", formatC(mae/1000, digits = 3, flag = "#"))),
x = plot_range[2], y = plot_range[1],
hjust = 1, colour = "black", size = 3) +
geom_label(data = n_obs,
aes(label = paste("n =", scales::comma(N))),
x = plot_range[2],
y = plot_range[2],
hjust = 1, colour = "NA", size = 3,
alpha = .9,
label.size = NA) +
geom_text(data = n_obs,
aes(label = paste("n =", scales::comma(N))),
x = plot_range[2],
y = plot_range[2],
hjust = 1, colour = "black", size = 3) +
guides(colour = "none") +
labs(x = "Predicted RT (s)",
y = "Observed RT (s)") +
coord_fixed(ratio = 1, xlim = plot_range, ylim = plot_range) +
scale_x_continuous(breaks = plot_breaks) +
scale_y_continuous(breaks = plot_breaks) +
scale_colour_manual(values = condition_colours)
p_rt_pred_v_obs
ggsave(plot = p_rt_pred_v_obs, file.path("..", "output", "rt_predicted_vs_observed.png"),
device = "png", width = 10, height = 4.5)
rm(p_rt_pred_v_obs)
Distribution of prediction error (truncated to [-5, 5] for readability):
p_rt_pred_error <- ggplot(pred_rt_error, aes(x = rt_pred_error/1000, fill = prediction_label)) +
facet_grid(prediction_label ~ course , scales = "free_y") +
geom_histogram(aes(y = ..density..), binwidth = .1) +
guides(fill = "none") +
labs(x = "RT prediction error in s (predicted - observed)",
y = "Density") +
coord_cartesian(xlim = c(-5, 5)) +
scale_fill_manual(values = condition_colours)
p_rt_pred_error
ggsave(plot = p_rt_pred_error, file.path("..", "output", "rt_prediction_error.png"),
device = "png", width = 5, height = 7.5)
rm(p_rt_pred_error)
p_rt_abs_pred_error <- ggplot(pred_rt_error, aes(x = abs_rt_pred_error/1000, fill = prediction_label)) +
facet_grid(prediction_label ~ course, scales = "free_y") +
geom_histogram(aes(y = ..density..), binwidth = .1) +
guides(fill = "none") +
labs(x = "RT prediction error in s (predicted - observed)",
y = "Density") +
coord_cartesian(xlim = c(0, 5)) +
scale_fill_manual(values = condition_colours)
p_rt_abs_pred_error
ggsave(plot = p_rt_abs_pred_error, file.path("..", "output", "rt_absolute_prediction_error.png"),
device = "png", width = 5, height = 7.5)
rm(p_rt_abs_pred_error)
ggplot(pred_rt_error_avg, aes(x = prediction_label, y = mae/1000, colour = course)) +
geom_boxplot(data = pred_rt_error,
aes(y = abs_rt_pred_error/1000, group = interaction(course, prediction_label)),
colour = "grey70",
width = .25,
outlier.shape = NA,
position = position_dodge(width = .5)) +
geom_errorbar(aes(ymin = mae/1000 - ae_se/1000, ymax = mae/1000 + ae_se/1000), width = 0, position = position_dodge(width = .5)) +
geom_point(position = position_dodge(width = .5)) +
coord_cartesian(ylim = c(0, 2.5)) +
labs(x = "Method",
y = "Absolute RT prediction error (in s)",
colour = "Course")
Fit a regression model on absolute RT prediction error.
m_rt_pred_error_gl_file <- file.path("..", "data", "model_fits", "m_rt_pred_error_Grandes_Lignes.rda")
if (file.exists(m_rt_pred_error_gl_file)) {
load(m_rt_pred_error_gl_file)
} else {
pred_gl_reg <- (
pred_rt_error
[course == "French"]
[sample(.N, 1e6)]
[, .(prediction_label, abs_rt_pred_error, user_id, fact_id)]
)
m_rt_pred_error_gl <- lmer(abs_rt_pred_error ~ prediction_label +
(1 | user_id) + (1 | fact_id),
data = pred_gl_reg,
control = lmerControl(optimizer ="bobyqa"))
save(m_rt_pred_error_gl, file = m_rt_pred_error_gl_file)
}
summary(m_rt_pred_error_gl)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula:
abs_rt_pred_error ~ prediction_label + (1 | user_id) + (1 | fact_id)
Data: pred_gl_reg
Control: lmerControl(optimizer = "bobyqa")
REML criterion at convergence: 18047298
Scaled residuals:
Min 1Q Median 3Q Max
-2.5110 -0.4053 -0.1761 0.0625 11.1385
Random effects:
Groups Name Variance Std.Dev.
user_id (Intercept) 220053 469.1
fact_id (Intercept) 194518 441.0
Residual 3843235 1960.4
Number of obs: 1000000, groups: user_id, 40820; fact_id, 22762
Fixed effects:
Estimate Std. Error df t value
(Intercept) 1433.560 6.509 53722.081 220.257
prediction_labelDomain -19.468 6.192 974447.696 -3.144
prediction_labelFact -118.481 6.218 974827.243 -19.056
prediction_labelLearner -42.334 6.270 975733.404 -6.751
prediction_labelFact & Learner -101.382 6.300 976077.378 -16.091
Pr(>|t|)
(Intercept) < 2e-16 ***
prediction_labelDomain 0.00167 **
prediction_labelFact < 2e-16 ***
prediction_labelLearner 1.46e-11 ***
prediction_labelFact & Learner < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) prdc_D prdc_F prdc_L
prdctn_lblD -0.474
prdctn_lblF -0.468 0.497
prdctn_lblL -0.462 0.492 0.490
prdctn_lF&L -0.455 0.490 0.489 0.486
Compare different prediction types to each other:
ht_rt_gl <- glht(m_rt_pred_error_gl, linfct = mcp(prediction_label = "Tukey"))
summary(ht_rt_gl)
Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: lmer(formula = abs_rt_pred_error ~ prediction_label + (1 | user_id) +
(1 | fact_id), data = pred_gl_reg, control = lmerControl(optimizer = "bobyqa"))
Linear Hypotheses:
Estimate Std. Error z value Pr(>|z|)
Domain - Default == 0 -19.468 6.192 -3.144 0.01438 *
Fact - Default == 0 -118.481 6.218 -19.056 < 0.001 ***
Learner - Default == 0 -42.334 6.270 -6.751 < 0.001 ***
Fact & Learner - Default == 0 -101.382 6.300 -16.091 < 0.001 ***
Fact - Domain == 0 -99.013 6.225 -15.905 < 0.001 ***
Learner - Domain == 0 -22.867 6.278 -3.642 0.00244 **
Fact & Learner - Domain == 0 -81.914 6.308 -12.986 < 0.001 ***
Learner - Fact == 0 76.146 6.304 12.079 < 0.001 ***
Fact & Learner - Fact == 0 17.099 6.329 2.702 0.05368 .
Fact & Learner - Learner == 0 -59.047 6.374 -9.264 < 0.001 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Adjusted p values reported -- single-step method)
Use emmeans to estimate the standardised effect size for
each contrast:
emm_rt_gl <- emmeans(m_rt_pred_error_gl, "prediction_label", lmer.df = "asymptotic")
# Sigma calculation follows the example in the eff_size documentation (see also Westfall et al., 2014)
vc_rt_gl <- as.data.frame(VarCorr(m_rt_pred_error_gl))
sigma_rt_gl <- sqrt(sum(vc_rt_gl$vcov))
eff_size_rt_gl <- eff_size(emm_rt_gl, sigma = sigma_rt_gl, edf = Inf) # Choice of edf does not affect effect size, only the SE
eff_size_rt_gl
contrast effect.size SE df asymp.LCL asymp.UCL
Default - Domain 0.00943 0.00300 Inf 0.00355 0.01532
Default - Fact 0.05742 0.00301 Inf 0.05151 0.06332
Default - Learner 0.02052 0.00304 Inf 0.01456 0.02647
Default - Fact & Learner 0.04913 0.00305 Inf 0.04315 0.05512
Domain - Fact 0.04798 0.00302 Inf 0.04207 0.05390
Domain - Learner 0.01108 0.00304 Inf 0.00512 0.01705
Domain - Fact & Learner 0.03970 0.00306 Inf 0.03371 0.04569
Fact - Learner -0.03690 0.00306 Inf -0.04289 -0.03091
Fact - Fact & Learner -0.00829 0.00307 Inf -0.01430 -0.00227
Learner - Fact & Learner 0.02862 0.00309 Inf 0.02256 0.03467
sigma used for effect sizes: 2063
Degrees-of-freedom method: inherited from asymptotic when re-gridding
Confidence level used: 0.95
Inspect the model’s residuals:
qqnorm(resid(m_rt_pred_error_gl))
qqline(resid(m_rt_pred_error_gl), col = "red")
plot(m_rt_pred_error_gl)
m_rt_pred_error_ss_file <- file.path("..", "data", "model_fits", "m_rt_pred_error_Stepping_Stones.rda")
if (file.exists(m_rt_pred_error_ss_file)) {
load(m_rt_pred_error_ss_file)
} else {
pred_ss_reg <- (
pred_rt_error
[course == "English"]
[sample(.N, 1e6)]
[, .(prediction_label, abs_rt_pred_error, user_id, fact_id)]
)
m_rt_pred_error_ss <- lmer(abs_rt_pred_error ~ prediction_label +
(1 | user_id) + (1 | fact_id),
data = pred_ss_reg,
control = lmerControl(optimizer ="bobyqa")
)
save(m_rt_pred_error_ss, file = m_rt_pred_error_ss_file)
}
summary(m_rt_pred_error_ss)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula:
abs_rt_pred_error ~ prediction_label + (1 | user_id) + (1 | fact_id)
Data: pred_ss_reg
Control: lmerControl(optimizer = "bobyqa")
REML criterion at convergence: 18097553
Scaled residuals:
Min 1Q Median 3Q Max
-1.6170 -0.3973 -0.1997 0.0215 10.9768
Random effects:
Groups Name Variance Std.Dev.
user_id (Intercept) 150645 388.1
fact_id (Intercept) 72213 268.7
Residual 4075141 2018.7
Number of obs: 1000000, groups: user_id, 85884; fact_id, 45600
Fixed effects:
Estimate Std. Error df t value
(Intercept) 1312.875 5.205 161344.300 252.225
prediction_labelDomain -27.214 6.416 984927.056 -4.242
prediction_labelFact -58.734 6.448 985192.856 -9.109
prediction_labelLearner -41.073 6.467 984764.807 -6.351
prediction_labelFact & Learner -64.932 6.493 985035.484 -10.000
Pr(>|t|)
(Intercept) < 2e-16 ***
prediction_labelDomain 2.22e-05 ***
prediction_labelFact < 2e-16 ***
prediction_labelLearner 2.14e-10 ***
prediction_labelFact & Learner < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) prdc_D prdc_F prdc_L
prdctn_lblD -0.617
prdctn_lblF -0.613 0.498
prdctn_lblL -0.609 0.496 0.494
prdctn_lF&L -0.606 0.494 0.492 0.491
Compare different prediction types to each other:
ht_rt_ss <- glht(m_rt_pred_error_ss, linfct = mcp(prediction_label = "Tukey"))
summary(ht_rt_ss)
Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: lmer(formula = abs_rt_pred_error ~ prediction_label + (1 | user_id) +
(1 | fact_id), data = pred_ss_reg, control = lmerControl(optimizer = "bobyqa"))
Linear Hypotheses:
Estimate Std. Error z value Pr(>|z|)
Domain - Default == 0 -27.214 6.416 -4.242 < 0.001 ***
Fact - Default == 0 -58.734 6.448 -9.109 < 0.001 ***
Learner - Default == 0 -41.073 6.467 -6.351 < 0.001 ***
Fact & Learner - Default == 0 -64.932 6.493 -10.000 < 0.001 ***
Fact - Domain == 0 -31.520 6.445 -4.890 < 0.001 ***
Learner - Domain == 0 -13.859 6.466 -2.143 0.20177
Fact & Learner - Domain == 0 -37.717 6.491 -5.811 < 0.001 ***
Learner - Fact == 0 17.661 6.497 2.718 0.05130 .
Fact & Learner - Fact == 0 -6.198 6.521 -0.950 0.87701
Fact & Learner - Learner == 0 -23.859 6.540 -3.648 0.00252 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Adjusted p values reported -- single-step method)
Use emmeans to estimate the standardised effect size for
each contrast:
emm_rt_ss <- emmeans(m_rt_pred_error_ss, "prediction_label", lmer.df = "asymptotic")
# Sigma calculation follows the example in the eff_size documentation (see also Westfall et al., 2014)
vc_rt_ss <- as.data.frame(VarCorr(m_rt_pred_error_ss))
sigma_rt_ss <- sqrt(sum(vc_rt_ss$vcov))
eff_size_rt_ss <- eff_size(emm_rt_ss, sigma = sigma_rt_ss, edf = Inf) # Choice of edf does not affect effect size, only the SE
eff_size_rt_ss
contrast effect.size SE df asymp.LCL asymp.UCL
Default - Domain 0.01313 0.00309 Inf 0.007061 0.01919
Default - Fact 0.02833 0.00311 Inf 0.022235 0.03443
Default - Learner 0.01981 0.00312 Inf 0.013698 0.02593
Default - Fact & Learner 0.03132 0.00313 Inf 0.025182 0.03746
Domain - Fact 0.01520 0.00311 Inf 0.009110 0.02130
Domain - Learner 0.00668 0.00312 Inf 0.000572 0.01280
Domain - Fact & Learner 0.01819 0.00313 Inf 0.012057 0.02433
Fact - Learner -0.00852 0.00313 Inf -0.014661 -0.00238
Fact - Fact & Learner 0.00299 0.00315 Inf -0.003175 0.00915
Learner - Fact & Learner 0.01151 0.00315 Inf 0.005325 0.01769
sigma used for effect sizes: 2073
Degrees-of-freedom method: inherited from asymptotic when re-gridding
Confidence level used: 0.95
Inspect the model’s residuals:
qqnorm(resid(m_rt_pred_error_ss))
qqline(resid(m_rt_pred_error_ss), col = "red")
plot(m_rt_pred_error_ss)
ht_rt_gl_tidy <- broom::tidy(confint(ht_rt_gl))
ht_rt_ss_tidy <- broom::tidy(confint(ht_rt_ss))
setDT(ht_rt_gl_tidy)
setDT(ht_rt_ss_tidy)
ht_rt_both_tidy <- rbind(ht_rt_gl_tidy[, course := "French"],
ht_rt_ss_tidy[, course := "English"])
p_rt_pred_error_comp <- ggplot(ht_rt_both_tidy, aes(x = contrast, y = estimate, ymin = conf.low, ymax = conf.high, colour = course)) +
geom_hline(yintercept = 0, linetype = "11", colour = "grey60") +
geom_errorbar(width = 0.1) +
geom_point() +
labs(x = "Linear hypotheses",
y = "Estimate",
caption = "Tukey's range test. Error bars show 95% family-wise confidence level.",
colour = "Course") +
coord_flip()
p_rt_pred_error_comp
ggsave(plot = p_rt_pred_error_comp, file.path("..", "output", "rt_prediction_error_comparisons.png"),
device = "png", width = 7.5, height = 5)
rm(p_rt_pred_error_comp)
pred_rt_error_avg[, prediction_rank := frank(-mae), by = .(course)]
annotation_df_ss <- data.table(
course = rep("English", 10),
start = c(1, 1, 1, 1,
2, 2, 2,
3, 3,
4
),
end = c(2, 3, 4, 5,
3, 4, 5,
4, 5,
5
),
y = seq(max(pred_rt_error_avg$mae)*1.01 + 45, max(pred_rt_error_avg$mae)*1.01, by = -5),
label = c("p < .001", "p < .001", "p < .001", "p < .001",
"n.s.", "p < .001", "p < .001",
"n.s.", "p < .01",
"n.s.")
)
annotation_df_gl <- data.table(
course = rep("French", 10),
start = c(1, 1, 1, 1,
2, 2, 2,
3, 3,
4
),
end = c(2, 3, 4, 5,
3, 4, 5,
4, 5,
5
),
y = seq(max(pred_rt_error_avg$mae)*1.01 + 45, max(pred_rt_error_avg$mae)*1.01, by = -5),
label = c("p < .05", "p < .001", "p < .001", "p < .001",
"p < .01", "p < .001", "p < .001",
"p < .001", "p < .001",
"n.s.")
)
annotation_df_rt <- rbind(annotation_df_ss, annotation_df_gl)
annotation_df_rt[, label := factor(label, levels = c("p < .001", "p < .01", "p < .05", "n.s."))]
p_rt_pred_error_summary <- ggplot(pred_rt_error_avg, aes(x = prediction_rank, y = mae)) +
facet_grid(~ course) +
geom_line(data = annotation_df_rt,
aes(x = 1, y = 1300, lty = label, alpha = label, colour = NULL)) + # Dummy line to get legend
geom_line(aes(colour = course, group = course)) +
geom_errorbar(aes(ymin = mae - ae_se, ymax = mae + ae_se), width = 0) +
geom_point(aes(colour = course, group = course)) +
geom_label(aes(label = prediction_label),
colour = "black",
alpha = .9,
label.size = NA,
nudge_y = -15) +
labs(x = NULL,
y = "Absolute prediction error:\nresponse time (s)",
colour = "Course") +
scale_x_continuous(expand = expansion(add = .75), breaks = NULL) +
scale_y_continuous(labels = scales::comma_format(big.mark = ".")) +
scale_colour_manual(values = dataset_colours) +
scale_linetype_manual(values = c("p < .001" = 1,
"p < .01" = 5,
"p < .05" = 2,
"n.s." = 3),
name = "Pairwise comparison:") +
scale_alpha_manual(values = c("p < .001" = 1,
"p < .01" = .75,
"p < .05" = .5,
"n.s." = .25),
name = "Pairwise comparison:") +
guides(colour = "none") +
ggsignif::geom_signif(data = annotation_df_rt,
aes(xmin = start, xmax = end, annotations = "",
y_position = y, lty = label, alpha = label),
tip_length = 0,
manual = TRUE) +
theme(legend.position = "bottom",
legend.justification = "right")
Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, and y_position
p_rt_pred_error_summary
ggsave(file.path("..", "output", "rt_absolute_prediction_error_summary.png"),
device = "png", width = 10, height = 4)
How big was the improvement from worst to best prediction method?
French:
# Absolute change
ht_rt_gl_tidy[contrast == "Fact - Default", estimate[1]]
[1] -118.4807
# % change
scales::percent(
ht_rt_gl_tidy[contrast == "Fact - Default", estimate[1]] / fixef(m_rt_pred_error_gl)[[1]],
accuracy = .1)
[1] "-8.3%"
# Associated standardised effect size
eff_size_rt_gl_tidy <- broom::tidy(eff_size_rt_gl) |> as.data.table()
eff_size_rt_gl_tidy[contrast == "Default - Fact", estimate]
[1] 0.05741889
English:
# Absolute change
ht_rt_ss_tidy[contrast == "Fact & Learner - Default", estimate[1]]
[1] -64.93171
# % change
scales::percent(
ht_rt_ss_tidy[contrast == "Fact & Learner - Default", estimate[1]] / fixef(m_rt_pred_error_ss)[[1]],
accuracy = .1)
[1] "-4.9%"
# Associated standardised effect size
eff_size_rt_ss_tidy <- broom::tidy(eff_size_rt_ss) |> as.data.table()
eff_size_rt_ss_tidy[contrast == "Default - Fact & Learner", estimate]
[1] 0.03132014
The mean absolute prediction error is a linear error metric. We could also choose a non-linear metric that penalises larger deviations more heavily. A common choice would be RMSE (root mean squared error).
pred_rt_error[, .(mae = mean(abs_rt_pred_error),
rmse = sqrt(mean(rt_pred_error^2))),
by = .(course, prediction_label)] |>
ggplot(aes(x = prediction_label, y = rmse, group = course, colour = course)) +
geom_line() +
geom_point()
plot_dodge <- function(y, dodge = .1) {
return (y * (1 + dodge) - dodge/2)
}
plot_dodge <- function(y, dodge = .1) {
return (y * (1 + dodge) - dodge/2)
}
p_acc_pred_v_obs <- ggplot(pred_beh, aes(x = predicted_accuracy, y = correct, group = prediction_label, colour = prediction_label, fill = prediction_label)) +
facet_grid(course ~ prediction_label) +
geom_point(aes(y = correct),
position = position_jitter(width = 0, height = .025, seed = 123),
size = .001, pch = ".", alpha = .1) +
labs(x = "Predicted accuracy",
y = "Response accuracy",
colour = "Prediction method",
fill = "Prediction method") +
guides(colour = "none",
fill = "none") +
scale_x_continuous(breaks = seq(0, 1, by = .25), labels = scales::percent_format()) +
scale_y_continuous(breaks = seq(0, 1, by = .25), labels = scales::percent_format()) +
scale_colour_manual(values = condition_colours) +
scale_fill_manual(values = condition_colours) +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1), clip = "off")
p_acc_pred_v_obs
ggsave(plot = p_acc_pred_v_obs, file.path("..", "output", "acc_predicted_vs_observed.png"),
device = "png", width = 10, height = 4.5)
rm(p_acc_pred_v_obs)
pred_acc_error <- (pred_beh
[, acc_pred_error := predicted_accuracy - correct]
[, abs_acc_pred_error := abs(acc_pred_error)])
pred_acc_error_avg <- pred_acc_error[, .(mae = mean(abs_acc_pred_error),
ae_se = sd(abs_acc_pred_error)/.N),
by = .(course, prediction_label)]
n_obs <- pred_acc_error[, .N, by = .(course, prediction_label)]
Distribution of prediction error:
p_acc_pred_error <- ggplot(pred_acc_error, aes(x = acc_pred_error, fill = prediction_label)) +
facet_grid(prediction_label ~ course , scales = "free_y") +
geom_histogram(aes(y = ..density..), binwidth = .01) +
guides(fill = "none") +
labs(x = "Accuracy prediction error (predicted - observed)",
y = "Density") +
coord_cartesian(xlim = c(-1, 1)) +
scale_fill_manual(values = condition_colours)
p_acc_pred_error
ggsave(plot = p_acc_pred_error, file.path("..", "output", "acc_prediction_error.png"),
device = "png", width = 5, height = 7.5)
rm(p_acc_pred_error)
ggplot(pred_acc_error_avg, aes(x = prediction_label, y = mae, colour = course)) +
geom_boxplot(data = pred_acc_error,
aes(y = abs_acc_pred_error, group = interaction(course, prediction_label)),
colour = "grey70",
width = .25,
outlier.shape = NA,
position = position_dodge(width = .5)) +
geom_errorbar(aes(ymin = mae - ae_se, ymax = mae + ae_se), width = 0, position = position_dodge(width = .5)) +
geom_point(position = position_dodge(width = .5)) +
coord_cartesian(ylim = c(0, 1)) +
labs(x = "Method",
y = "Absolute accuracy prediction error",
colour = "Course")
ggplot(pred_acc_error_avg, aes(x = prediction_label, y = mae, colour = course)) +
geom_boxplot(data = pred_acc_error,
aes(y = abs_acc_pred_error, group = interaction(course, prediction_label)),
colour = "grey70",
width = .25,
outlier.shape = NA,
position = position_dodge(width = .5)) +
geom_errorbar(aes(ymin = mae - ae_se, ymax = mae + ae_se), width = 0, position = position_dodge(width = .5)) +
geom_point(position = position_dodge(width = .5)) +
coord_cartesian(ylim = c(0, 1)) +
labs(x = "Method",
y = "Absolute accuracy prediction error",
colour = "Course")
Fit a regression model.
m_acc_pred_error_gl_file <- file.path("..", "data", "model_fits", "m_acc_pred_error_Grandes_Lignes_new.rda")
if (file.exists(m_acc_pred_error_gl_file)) {
load(m_acc_pred_error_gl_file)
} else {
pred_gl_reg <- (
pred_acc_error
[course == "French"]
[sample(.N, 1e6)]
[, .(prediction_label, abs_acc_pred_error, user_id, fact_id)]
)
m_acc_pred_error_gl <- lmer(abs_acc_pred_error ~ prediction_label +
(1 | user_id) + (1 | fact_id),
data = pred_gl_reg,
control = lmerControl(optimizer ="bobyqa"))
save(m_acc_pred_error_gl, file = m_acc_pred_error_gl_file)
}
summary(m_acc_pred_error_gl)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: abs_acc_pred_error ~ prediction_label + (1 | user_id) + (1 |
fact_id)
Data: pred_gl_reg
Control: lmerControl(optimizer = "bobyqa")
REML criterion at convergence: -1051298
Scaled residuals:
Min 1Q Median 3Q Max
-4.5692 -0.5355 -0.0828 0.4664 4.9073
Random effects:
Groups Name Variance Std.Dev.
user_id (Intercept) 0.001687 0.04108
fact_id (Intercept) 0.002174 0.04662
Residual 0.019154 0.13840
Number of obs: 1000000, groups: user_id, 41014; fact_id, 22541
Fixed effects:
Estimate Std. Error df t value
(Intercept) 4.780e-01 5.493e-04 5.561e+04 870.21
prediction_labelDomain -2.752e-02 4.385e-04 9.736e+05 -62.76
prediction_labelFact -3.682e-02 4.409e-04 9.734e+05 -83.50
prediction_labelLearner -2.963e-02 4.447e-04 9.745e+05 -66.63
prediction_labelFact & Learner -4.948e-02 4.466e-04 9.743e+05 -110.80
Pr(>|t|)
(Intercept) <2e-16 ***
prediction_labelDomain <2e-16 ***
prediction_labelFact <2e-16 ***
prediction_labelLearner <2e-16 ***
prediction_labelFact & Learner <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) prdc_D prdc_F prdc_L
prdctn_lblD -0.400
prdctn_lblF -0.391 0.498
prdctn_lblL -0.387 0.494 0.491
prdctn_lF&L -0.379 0.492 0.490 0.487
Compare different prediction types to each other:
qqnorm(resid(m_acc_pred_error_gl))
qqline(resid(m_acc_pred_error_gl), col = "red")
Use emmeans to estimate the standardised effect size for
each contrast:
plot(m_acc_pred_error_gl)
Inspect the model’s residuals:
qqnorm(resid(m_acc_pred_error_gl))
qqline(resid(m_acc_pred_error_gl), col = "red")
plot(m_acc_pred_error_gl)
m_acc_pred_error_ss_file <- file.path("..", "data", "model_fits", "m_acc_pred_error_Stepping_Stones_new.rda")
if (file.exists(m_acc_pred_error_ss_file)) {
load(m_acc_pred_error_ss_file)
} else {
pred_ss_reg <- (
pred_acc_error
[course == "English"]
[sample(.N, 1e6)]
[, .(prediction_label, abs_acc_pred_error, user_id, fact_id)]
)
m_acc_pred_error_ss <- lmer(abs_acc_pred_error ~ prediction_label +
(1 | user_id) + (1 | fact_id),
data = pred_ss_reg,
control = lmerControl(optimizer ="bobyqa")
)
save(m_acc_pred_error_ss, file = m_acc_pred_error_ss_file)
}
summary(m_acc_pred_error_ss)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: abs_acc_pred_error ~ prediction_label + (1 | user_id) + (1 |
fact_id)
Data: pred_ss_reg
Control: lmerControl(optimizer = "bobyqa")
REML criterion at convergence: -1182614
Scaled residuals:
Min 1Q Median 3Q Max
-4.5639 -0.4544 -0.0844 0.3632 6.8129
Random effects:
Groups Name Variance Std.Dev.
user_id (Intercept) 0.001618 0.04022
fact_id (Intercept) 0.001753 0.04187
Residual 0.016321 0.12775
Number of obs: 1000000, groups: user_id, 86095; fact_id, 45414
Fixed effects:
Estimate Std. Error df t value
(Intercept) 4.472e-01 4.203e-04 1.179e+05 1064.06
prediction_labelDomain -3.082e-02 4.115e-04 9.634e+05 -74.89
prediction_labelFact -3.611e-02 4.136e-04 9.631e+05 -87.31
prediction_labelLearner -3.341e-02 4.150e-04 9.628e+05 -80.50
prediction_labelFact & Learner -4.628e-02 4.163e-04 9.617e+05 -111.16
Pr(>|t|)
(Intercept) <2e-16 ***
prediction_labelDomain <2e-16 ***
prediction_labelFact <2e-16 ***
prediction_labelLearner <2e-16 ***
prediction_labelFact & Learner <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) prdc_D prdc_F prdc_L
prdctn_lblD -0.489
prdctn_lblF -0.483 0.497
prdctn_lblL -0.481 0.496 0.493
prdctn_lF&L -0.476 0.494 0.492 0.491
Compare different prediction types to each other:
qqnorm(resid(m_acc_pred_error_ss))
qqline(resid(m_acc_pred_error_ss), col = "red")
Use emmeans to estimate the standardised effect size for
each contrast:
plot(m_acc_pred_error_ss)
Inspect the model’s residuals:
ht_acc_gl_tidy <- broom::tidy(confint(ht_acc_gl))
ht_acc_ss_tidy <- broom::tidy(confint(ht_acc_ss))
setDT(ht_acc_gl_tidy)
setDT(ht_acc_ss_tidy)
ht_acc_both_tidy <- rbind(ht_acc_gl_tidy[, course := "French"],
ht_acc_ss_tidy[, course := "English"])
plot(m_acc_pred_error_ss)
ht_acc_gl_tidy <- broom::tidy(confint(ht_acc_gl))
ht_acc_ss_tidy <- broom::tidy(confint(ht_acc_ss))
setDT(ht_acc_gl_tidy)
setDT(ht_acc_ss_tidy)
ht_acc_both_tidy <- rbind(ht_acc_gl_tidy[, course := "French"],
ht_acc_ss_tidy[, course := "English"])
p_acc_pred_error_comp <- ggplot(ht_acc_both_tidy, aes(x = contrast, y = estimate, ymin = conf.low, ymax = conf.high, colour = course)) +
geom_hline(yintercept = 0, linetype = "11", colour = "grey60") +
geom_errorbar(width = 0.1) +
geom_point() +
labs(x = "Linear hypotheses",
y = "Estimate",
caption = "Tukey's range test. Error bars show 95% family-wise confidence level.",
colour = "Course") +
coord_flip()
p_acc_pred_error_comp
ggsave(plot = p_acc_pred_error_comp, file.path("..", "output", "acc_prediction_error_comparisons.png"),
device = "png", width = 7.5, height = 5)
rm(p_acc_pred_error_comp)
pred_acc_error_avg[, prediction_rank := frank(-mae), by = .(course)]
annotation_df_ss <- data.table(
course = rep("English", 10),
start = c(1, 1, 1, 1,
2, 2, 2,
3, 3,
4
),
end = c(2, 3, 4, 5,
3, 4, 5,
4, 5,
5
),
y = seq(max(pred_acc_error_avg$mae)*1.01 + .0225, max(pred_acc_error_avg$mae)*1.01, by = -.0025),
label = c("p < .001", "p < .001", "p < .001", "p < .001",
"p < .001", "p < .001", "p < .001",
"p < .001", "p < .001",
"p < .001")
)
annotation_df_gl <- data.table(
course = rep("French", 10),
start = c(1, 1, 1, 1,
2, 2, 2,
3, 3,
4
),
end = c(2, 3, 4, 5,
3, 4, 5,
4, 5,
5
),
y = seq(max(pred_acc_error_avg$mae)*1.01 + .0225, max(pred_acc_error_avg$mae)*1.01, by = -.0025),
label = c("p < .001", "p < .001", "p < .001", "p < .001",
"p < .001", "p < .001", "p < .001",
"p < .001", "p < .001",
"p < .001")
)
annotation_df_acc <- rbind(annotation_df_ss, annotation_df_gl)
annotation_df_acc[, label := factor(label, levels = c("p < .001", "p < .01", "p < .05", "n.s."))]
p_acc_pred_error_summary <- ggplot(pred_acc_error_avg, aes(x = prediction_rank, y = mae)) +
facet_grid(~ course) +
geom_line(data = annotation_df_acc,
aes(x = 1, y = .45, lty = label, alpha = label, colour = NULL)) + # Dummy line to get legend
geom_line(aes(colour = course, group = course)) +
geom_errorbar(aes(ymin = mae - ae_se, ymax = mae + ae_se), width = 0) +
geom_point(aes(colour = course, group = course)) +
geom_label(aes(label = prediction_label),
colour = "black",
alpha = .9,
label.size = NA,
nudge_y = -.007) +
labs(x = NULL,
y = "Absolute prediction error:\nresponse accuracy",
colour = "Course") +
scale_x_continuous(expand = expansion(add = .75), breaks = NULL) +
scale_colour_manual(values = dataset_colours) +
scale_linetype_manual(values = c("p < .001" = 1,
"p < .01" = 5,
"p < .05" = 2,
"n.s." = 3),
drop = FALSE,
name = "Pairwise comparison:") +
scale_alpha_manual(values = c("p < .001" = 1,
"p < .01" = .75,
"p < .05" = .5,
"n.s." = .25),
drop = FALSE,
name = "Pairwise comparison:") +
guides(colour = "none") +
ggsignif::geom_signif(data = annotation_df_acc,
aes(xmin = start, xmax = end, annotations = "",
y_position = y, lty = label, alpha = label),
tip_length = 0,
manual = TRUE) +
theme(legend.position = "bottom",
legend.justification = "right")
Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, and y_position
p_acc_pred_error_summary
ggsave(file.path("..", "output", "acc_absolute_prediction_error_summary.png"),
device = "png", width = 10, height = 4)
How big was the improvement from worst to best prediction method?
French:
# Absolute change
ht_acc_ss_tidy[contrast == "Fact & Learner - Default", estimate[1]]
[1] -0.0462795
# % change
scales::percent(
ht_acc_ss_tidy[contrast == "Fact & Learner - Default", estimate[1]] / fixef(m_acc_pred_error_ss)[[1]],
accuracy = .1)
[1] "-10.3%"
# Associated standardised effect size
eff_size_acc_ss_tidy <- broom::tidy(eff_size_acc_ss) |> as.data.table()
eff_size_acc_ss_tidy[contrast == "Default - Fact & Learner", estimate]
[1] 0.3297989
English:
(p_acc_pred_error_summary + p_rt_pred_error_summary) +
plot_layout(ncol = 1, guides = "collect") +
plot_annotation(tag_levels = "A") &
theme(legend.position = "bottom")
ggsave(file.path("..", "output", "beh_absolute_prediction_error_summary.png"),
device = "png", width = 10, height = 8)
(p_acc_pred_error_summary + p_rt_pred_error_summary) +
plot_layout(ncol = 1, guides = "collect") +
plot_annotation(tag_levels = "A") &
theme(legend.position = "bottom")
ggsave(file.path("..", "output", "beh_absolute_prediction_error_summary.png"),
device = "png", width = 10, height = 8)
sessionInfo()